{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Arboles de decisión: Hiperparámetros, Random Forest y Optimización de Parámetros" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "import os\n", "import pandas as pd\n", "from sklearn.tree import DecisionTreeClassifier\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, f1_score" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PregnanciesGlucoseBloodPressureSkinThicknessInsulinBMIDiabetesPedigreeFunctionAgeOutcome
061487235033.60.627501
11856629026.60.351310
28183640023.30.672321
318966239428.10.167210
40137403516843.12.288331
\n", "
" ], "text/plain": [ " Pregnancies Glucose BloodPressure SkinThickness Insulin BMI \\\n", "0 6 148 72 35 0 33.6 \n", "1 1 85 66 29 0 26.6 \n", "2 8 183 64 0 0 23.3 \n", "3 1 89 66 23 94 28.1 \n", "4 0 137 40 35 168 43.1 \n", "\n", " DiabetesPedigreeFunction Age Outcome \n", "0 0.627 50 1 \n", "1 0.351 31 0 \n", "2 0.672 32 1 \n", "3 0.167 21 0 \n", "4 2.288 33 1 " ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.read_csv(os.path.join('../Datasets/diabetes.csv'))\n", "df.head()" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(537, 7) (231, 7) (537,) (231,)\n" ] } ], "source": [ "feature_cols = ['Pregnancies', 'Insulin', 'BMI', 'Age','Glucose','BloodPressure','DiabetesPedigreeFunction']\n", "X = df[feature_cols]\n", "Y = df[\"Outcome\"]\n", "X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=1) # 70% training, 30% test\n", "print(X_train.shape, X_test.shape, Y_train.shape, Y_test.shape)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',\n", " max_depth=None, max_features=None, max_leaf_nodes=None,\n", " min_impurity_decrease=0.0, min_impurity_split=None,\n", " min_samples_leaf=1, min_samples_split=2,\n", " min_weight_fraction_leaf=0.0, presort='deprecated',\n", " random_state=None, splitter='best')" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# baseline no incluye poda (max_depth)\n", "treev1 = DecisionTreeClassifier()\n", "treev1.fit(X_train, Y_train)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0,\n", " 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0,\n", " 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0,\n", " 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0,\n", " 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0,\n", " 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,\n", " 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,\n", " 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0,\n", " 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0,\n", " 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0,\n", " 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0])" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "Y_pred = treev1.predict(X_test)\n", "Y_pred" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "accuracy del clasificador - version 1 : 0.68\n", "matriz de confusión del clasificador - version 1: \n", " [[111 35]\n", " [ 40 45]]\n", "precision del clasificador - version 1 : 0.56\n", "recall del clasificador - version 1 : 0.53\n", "f1 del clasificador - version 1 : 0.55\n" ] } ], "source": [ "def metricas_desempenio(tree):\n", " print('accuracy del clasificador - version 1 : {0:.2f}'.format(accuracy_score(Y_test, tree.predict(X_test))))\n", " print('matriz de confusión del clasificador - version 1: \\n {0}'.format(confusion_matrix(Y_test, tree.predict(X_test))))\n", " print('precision del clasificador - version 1 : {0:.2f}'.format(precision_score(Y_test, tree.predict(X_test))))\n", " print('recall del clasificador - version 1 : {0:.2f}'.format(recall_score(Y_test, tree.predict(X_test))))\n", " print('f1 del clasificador - version 1 : {0:.2f}'.format(f1_score(Y_test, tree.predict(X_test))))\n", "metricas_desempenio(treev1)" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='entropy',\n", " max_depth=3, max_features=None, max_leaf_nodes=None,\n", " min_impurity_decrease=0.0, min_impurity_split=None,\n", " min_samples_leaf=1, min_samples_split=2,\n", " min_weight_fraction_leaf=0.0, presort='deprecated',\n", " random_state=None, splitter='best')" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Ajustar algunos hiperparámetros\n", "tree_v2 = DecisionTreeClassifier(criterion=\"entropy\", max_depth=3)\n", "tree_v2.fit(X_train, Y_train)" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "accuracy del clasificador - version 1 : 0.77\n", "matriz de confusión del clasificador - version 1: \n", " [[124 22]\n", " [ 31 54]]\n", "precision del clasificador - version 1 : 0.71\n", "recall del clasificador - version 1 : 0.64\n", "f1 del clasificador - version 1 : 0.67\n" ] } ], "source": [ "metricas_desempenio(tree_v2)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Hiperparámetros para ajustar la complejidad del modelo\n", "\n", "__[DecisionTreeClassifier](https://scikit-learn.org/stable/modules/generated/sklearn.tree.DecisionTreeClassifier.html)__\n", "* class_weight=None importancia relativa de los valores de clasificación\n", "* criterion='entropy'/'gini'\n", "* max_depth=3 distancia max entre a raiz y las hojas\n", "* max_features=None numero max de variables a considerar\n", "* max_leaf_nodes=20 numero max de hojas\n", "* min_impurity_decrease=0.0\n", "* min_impurity_split=None (Deprecado)\n", "* min_samples_leaf=1 Podar si quedan menos que este numero de ejemplos \n", "* min_samples_split=2 Continuar si quedan al menos esta cantidad de ejemplos\n", "* min_weight_fraction_leaf=0.0 Porcentaje minimo de ejemplo para continuar\n", "\n", "\n", "\n", "\n", "**Más allá de cierto umbral, la complejidad del modelo afecta negativamente el desempeño debido al sobreajuste**" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Sobreajuste\n", "\n", "__[Sobreajuste](https://es.wikipedia.org/wiki/Sobreajuste)__\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "**Como evitar?**\n", "En el caso particular de los árboles de decisión, reducir nodos del arbol cuando no incrementan los indicadores con una buena cantidad de datos de prueba (**poda - pruning**)\n", "\n", "### Ensemble learning\n", "\n", "__[Ensemble learning](https://en.wikipedia.org/wiki/Ensemble_learning)__\n", "\n", "\n", "\n", "\n", "\n", "\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Random Forest\n", "\n", "__[RandomForestClassifier](https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestClassifier.html)__\n", "\n", "Se crean varios árboles **INDEPENDIENTES** variando los casos/observaciones del conjunto de entrenamiento y/o las variables empleadas durante el proceso de entrenamiento.\n", "\n", "Las predicciones de cada modelo (árbol) tienen el mismo peso y el resultado final es el voto de mayoría\n", "\n", "**Parámetros:**\n", "* **n_estimators** número de clasificadores, árboles en este caso.\n", "\n", "Los valores adecuados para este y otros parámetros se obtienen via experimentación (prueba y error). Si es posible, se recomienda tener varios conjuntos de prueba para seleccionar el modelo con el mejor desempeño (promedio) en todos los conjunto de prueba" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,\n", " criterion='gini', max_depth=None, max_features='auto',\n", " max_leaf_nodes=None, max_samples=None,\n", " min_impurity_decrease=0.0, min_impurity_split=None,\n", " min_samples_leaf=1, min_samples_split=2,\n", " min_weight_fraction_leaf=0.0, n_estimators=10,\n", " n_jobs=None, oob_score=False, random_state=None,\n", " verbose=0, warm_start=False)" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn.ensemble import RandomForestClassifier\n", "#Ajustar n_estimators puede reducir la posiblidad de overfitting\n", "tree_v3 = RandomForestClassifier(n_estimators=10)\n", "tree_v3" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,\n", " criterion='gini', max_depth=None, max_features='auto',\n", " max_leaf_nodes=None, max_samples=None,\n", " min_impurity_decrease=0.0, min_impurity_split=None,\n", " min_samples_leaf=1, min_samples_split=2,\n", " min_weight_fraction_leaf=0.0, n_estimators=10,\n", " n_jobs=None, oob_score=False, random_state=None,\n", " verbose=0, warm_start=False)" ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tree_v3.fit(X_train, Y_train)" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "accuracy del clasificador - version 1 : 0.77\n", "matriz de confusión del clasificador - version 1: \n", " [[132 14]\n", " [ 38 47]]\n", "precision del clasificador - version 1 : 0.77\n", "recall del clasificador - version 1 : 0.55\n", "f1 del clasificador - version 1 : 0.64\n" ] } ], "source": [ "metricas_desempenio(tree_v3)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Gradient Boosted Trees\n", "Los arboles se construyen en **secuencia** a partir de una fracción del conjunto de entrenamiento; la idea central es que el siguiente árbol corriga los errores del anterior.:\n", "\n", "Inicialmente, todos los ejemplos tienen la misma probabilidad de ser seleccionados. A partir del segundo árbol, los ejemplos que fueros incorrectamente clasificados por el árbol anterior tienen mayor probabilidad de ser seleccionados. (para detectar patrones que no fueron detectados por el anterior)\n", "\n", "En consecuencia, cada árbol se crea a partir de una fracción diferente del conjunto de entrenamiento. En la colección final, la clasificación de cada árbol tiene un peso mayor en función del desempeño obtenido con el conjuto de entrenamiento.\n", "\n", "__[py-xgboost](https://anaconda.org/anaconda/py-xgboost)__\n", "\n", "__[xgboost](https://xgboost.readthedocs.io/en/latest/python/python_api.html)__\n" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [], "source": [ "from xgboost.sklearn import XGBClassifier" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,\n", " colsample_bynode=None, colsample_bytree=None, gamma=None,\n", " gpu_id=None, importance_type='gain', interaction_constraints=None,\n", " learning_rate=None, max_delta_step=None, max_depth=None,\n", " min_child_weight=None, missing=nan, monotone_constraints=None,\n", " n_estimators=10, n_jobs=None, num_parallel_tree=None,\n", " objective='binary:logistic', random_state=None, reg_alpha=None,\n", " reg_lambda=None, scale_pos_weight=None, subsample=None,\n", " tree_method=None, validate_parameters=False, verbosity=None)" ] }, "execution_count": 39, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tree_v4 = XGBClassifier(n_estimators=10)\n", "tree_v4" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "XGBClassifier(base_score=0.5, booster=None, colsample_bylevel=1,\n", " colsample_bynode=1, colsample_bytree=1, gamma=0, gpu_id=-1,\n", " importance_type='gain', interaction_constraints=None,\n", " learning_rate=0.300000012, max_delta_step=0, max_depth=6,\n", " min_child_weight=1, missing=nan, monotone_constraints=None,\n", " n_estimators=10, n_jobs=0, num_parallel_tree=1,\n", " objective='binary:logistic', random_state=0, reg_alpha=0,\n", " reg_lambda=1, scale_pos_weight=1, subsample=1, tree_method=None,\n", " validate_parameters=False, verbosity=None)" ] }, "execution_count": 40, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tree_v4.fit(X_train, Y_train)" ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "accuracy del clasificador - version 1 : 0.80\n", "matriz de confusión del clasificador - version 1: \n", " [[131 15]\n", " [ 32 53]]\n", "precision del clasificador - version 1 : 0.78\n", "recall del clasificador - version 1 : 0.62\n", "f1 del clasificador - version 1 : 0.69\n" ] } ], "source": [ "metricas_desempenio(tree_v4)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Parámetros que se pueden emplear para evitar sobre ajuste (overfitting) \n", "* **n_estimators**: a mayor cantidad de ejemplos, se puede incrementar el valor n_estimators para evitar sobreajuste\n", "\n", "* **learning_rate**, determina la probabilidad de que un ejemplo sea seleccionado en la siguiente iteracion, se recomienda un valor entre 0.1 - 0.2 para reducir la probabilidad de que se produzca overfitting\n", "\n", "* **subsample**, permite controlar el tamaño de la fracción del conjunto de entrenamiento para cada iteración. Mientras más bajo el valor, más probabilidad hay de que los conjuntos de entrenamiento entre iteraciones sean diferentes (a mayor diferencia, menos probabilidad de que se produzca overfitting). Se recomienda valores entre 0.5 - 1.0\n", "\n", "* **colsample_bytree**, permite controlar la fracción de las variables empleadas para entrenar los árboles en cada iteración. Se recomienda valores entre 0.5 - 1.0" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Optimización de parámetros\n", "\n", "Objetivo: encontrar la mejor combinación de hiper-parámetros para obtener el clasificador con el mejor desempeño.\n", "\n", "Para evitar probar manualmente todas las posibles combinaciones de valores para todos los posibles parámetros que resultan en un buen desempeño, se emplean técnicas de optimización para evitar buscar en todo el espacio de posible valores y garantizar al mismo tiempo un buen desempeño del clasificador. \n", "\n", "\n", "__[hyperopt (Distributed Hyperparameter Optimization)](https://github.com/hyperopt/hyperopt)__ es el módulo python que facilita realizar esta tarea." ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [], "source": [ "#conda install -c conda-forge hyperopt\n", "from hyperopt import fmin, tpe, hp, STATUS_OK,Trials" ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [], "source": [ "space = {\n", " 'x':hp.quniform('x',-1,1,1), #probar con valores entre -10 - 10, con incrementos de 1 \n", "}" ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [], "source": [ "def objective(params):\n", " x = int(params['x'])\n", " return {'loss':x ** 2,'status':STATUS_OK} " ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [], "source": [ "trials = Trials()" ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "100%|██████████| 5/5 [00:00<00:00, 271.82trial/s, best loss: 0.0]\n", "{'x': 0.0}\n" ] } ], "source": [ "best = fmin(objective, space, algo=tpe.suggest, trials=trials, max_evals=5)\n", "print(best)" ] }, { "cell_type": "code", "execution_count": 51, "metadata": {}, "outputs": [], "source": [ "#Probar valores entre 100 - 1000, con incrementos de 1 - con igual probabilidad de ser seleccionado: \n", "#'n_estimators':hp.quniform('n_estimators',100,1000,1)\n", "#Crear un diccionario que contiene la configuración para generar diferentes valores para cada parámetro; en este ejemplo,\n", "#para el algoritmo XGBClassifier.\n", "space = {\n", " 'n_estimators':hp.quniform('n_estimators',100,1000,1), #probar con valores entre 100 - 100, con incrementos de 1 \n", " 'learning_rate':hp.quniform('learning_rate',0.025,0.5,0.025),\n", " 'max_depth':hp.quniform('max_depth',1,13,1),\n", " 'subsample': hp.quniform('subsample',0.5,1,0.05),\n", " 'colsample_bytree':hp.quniform('colsample_bytree',0.5,1,0.05),\n", " 'nthread':6, #cuando se posible, paralelizar el procesamiento empleando hasta 6 hilos\n", " 'silent':1 #si ocurre un error, continuar con la ejecución\n", "}" ] }, { "cell_type": "code", "execution_count": 52, "metadata": {}, "outputs": [], "source": [ "#Es necesario definir una función de manera tal que cuando alcance el valor mínimo, esto implique que el clasificador\n", "#ha alanzado en mejor desempeño. \n", "#En el ejemplo siguiente, el menor valor posible para esta función (0), si se da cuando accuracy = 1.\n", "def objective(params):\n", " params['n_estimators'] = int(params['n_estimators'])\n", " params['max_depth'] = int(params['max_depth']) \n", " clf = XGBClassifier(**params) #https://treyhunner.com/2018/10/asterisks-in-python-what-they-are-and-how-to-use-them/\n", " clf.fit(X_train, Y_train) \n", " accuracy = accuracy_score(Y_test, clf.predict(X_test))\n", " return {'loss': 1 - accuracy, 'status': STATUS_OK}" ] }, { "cell_type": "code", "execution_count": 53, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "100%|██████████| 100/100 [01:55<00:00, 1.16s/trial, best loss: 0.18181818181818177]\n", "{'colsample_bytree': 0.8, 'learning_rate': 0.025, 'max_depth': 3.0, 'n_estimators': 560.0, 'subsample': 0.6000000000000001}\n" ] } ], "source": [ "#https://github.com/hyperopt/hyperopt/wiki/FMin#12-attaching-extra-information-via-the-trials-object\n", "#fmin Itera 100 veces y retorna la combinación de parámetros que generan el menor valor para la función 'objective'\n", "trials = Trials()\n", "best = fmin(objective,space,algo=tpe.suggest,trials=trials,max_evals=100)\n", "print(best)" ] }, { "cell_type": "code", "execution_count": 54, "metadata": {}, "outputs": [], "source": [ "best['n_estimators'] = int(best['n_estimators'])\n", "best['max_depth'] = int(best['max_depth']) " ] }, { "cell_type": "code", "execution_count": 55, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,\n", " colsample_bynode=None, colsample_bytree=0.8, gamma=None,\n", " gpu_id=None, importance_type='gain', interaction_constraints=None,\n", " learning_rate=0.025, max_delta_step=None, max_depth=3,\n", " min_child_weight=None, missing=nan, monotone_constraints=None,\n", " n_estimators=560, n_jobs=None, num_parallel_tree=None,\n", " objective='binary:logistic', random_state=None, reg_alpha=None,\n", " reg_lambda=None, scale_pos_weight=None,\n", " subsample=0.6000000000000001, tree_method=None,\n", " validate_parameters=False, verbosity=None)" ] }, "execution_count": 55, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tree_v5 = XGBClassifier(**best)\n", "tree_v5" ] }, { "cell_type": "code", "execution_count": 56, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "XGBClassifier(base_score=0.5, booster=None, colsample_bylevel=1,\n", " colsample_bynode=1, colsample_bytree=0.8, gamma=0, gpu_id=-1,\n", " importance_type='gain', interaction_constraints=None,\n", " learning_rate=0.025, max_delta_step=0, max_depth=3,\n", " min_child_weight=1, missing=nan, monotone_constraints=None,\n", " n_estimators=560, n_jobs=0, num_parallel_tree=1,\n", " objective='binary:logistic', random_state=0, reg_alpha=0,\n", " reg_lambda=1, scale_pos_weight=1, subsample=0.6000000000000001,\n", " tree_method=None, validate_parameters=False, verbosity=None)" ] }, "execution_count": 56, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tree_v5.fit(X_train, Y_train)" ] }, { "cell_type": "code", "execution_count": 57, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "accuracy del clasificador - version 1 : 0.82\n", "matriz de confusión del clasificador - version 1: \n", " [[131 15]\n", " [ 27 58]]\n", "precision del clasificador - version 1 : 0.79\n", "recall del clasificador - version 1 : 0.68\n", "f1 del clasificador - version 1 : 0.73\n" ] } ], "source": [ "metricas_desempenio(tree_v5)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.6" } }, "nbformat": 4, "nbformat_minor": 2 }